import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objs as go
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')


df=pd.read_csv('data.csv')


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Person ID                374 non-null    int64  
 1   Gender                   374 non-null    object 
 2   Age                      374 non-null    int64  
 3   Occupation               374 non-null    object 
 4   Sleep Duration           374 non-null    float64
 5   Quality of Sleep         374 non-null    int64  
 6   Physical Activity Level  374 non-null    int64  
 7   Stress Level             374 non-null    int64  
 8   BMI Category             374 non-null    object 
 9   Blood Pressure           374 non-null    object 
 10  Heart Rate               374 non-null    int64  
 11  Daily Steps              374 non-null    int64  
 12  Sleep Disorder           374 non-null    object 
dtypes: float64(1), int64(7), object(5)
memory usage: 38.1+ KB


print('Unique Values of Occupation are', df['Occupation'].unique())

Unique Values of Occupation are ['Software Engineer' 'Doctor' 'Sales Representative' 'Teacher' 'Nurse'
 'Engineer' 'Accountant' 'Scientist' 'Lawyer' 'Salesperson' 'Manager']


print('Unique Values of BMI Category are', df['BMI Category'].unique())

Unique Values of BMI Category are ['Overweight' 'Normal' 'Obese' 'Normal Weight']


print('Unique Values of Sleep Disorder are', df['Sleep Disorder'].unique())

Unique Values of Sleep Disorder are ['None' 'Sleep Apnea' 'Insomnia']


df['Blood Pressure'].unique()

array(['126/83', '125/80', '140/90', '120/80', '132/87', '130/86',
       '117/76', '118/76', '128/85', '131/86', '128/84', '115/75',
       '135/88', '129/84', '130/85', '115/78', '119/77', '121/79',
       '125/82', '135/90', '122/80', '142/92', '140/95', '139/91',
       '118/75'], dtype=object)


df1 = pd.concat([df, df['Blood Pressure'].str.split('/', expand=True)], axis=1).drop(
    'Blood Pressure', axis=1)


df1=df1.rename(columns={0: 'BloodPressure_high', 1: 'BloodPressure_low'})


df1['BloodPressure_high'] = df1['BloodPressure_high'].astype(float)
df1['BloodPressure_low'] = df1['BloodPressure_low'].astype(float)


df1.head()


plt.figure(figsize=(10,6))
sns.heatmap(df1.drop('Person ID',axis=1).corr(),annot=True,fmt="1.1f");


sns.pairplot(df1.drop('Person ID',axis=1),hue='Sleep Disorder');


num_col=['Age','Sleep Duration',
       'Quality of Sleep', 'Physical Activity Level', 'Stress Level',
       'Heart Rate', 'Daily Steps',
       'BloodPressure_high', 'BloodPressure_low']


cat_col=['Gender','Occupation','BMI Category','Sleep Disorder']


fig = plt.figure(figsize=(10,10))

for i in range(len(num_col)):
    plt.subplot(3,3,i+1)
    plt.title(num_col[i])
    sns.histplot(data=df1,x=df1[num_col[i]],hue='Sleep Disorder')
    plt.legend(fontsize=7,labels=df['Sleep Disorder'].unique())
plt.tight_layout()
plt.show()


fig = plt.figure(figsize=(10,10))

for i in range(len(num_col)):
    plt.subplot(3,3,i+1)
    plt.title(num_col[i])
    sns.histplot(data=df1,x=df1[num_col[i]],hue='BMI Category')
    plt.legend(labels=df['BMI Category'].unique(),fontsize=6)
plt.tight_layout()
plt.show()


fig = plt.figure(figsize=(8,8))

for i in range(len(num_col)):
    plt.subplot(3,3,i+1)
    plt.title(num_col[i])
    sns.boxplot(data=df1,y=df1['Gender'],x=df1[num_col[i]])
plt.tight_layout()
plt.show()


fig = plt.figure(figsize=(15,8))
for i in range(len(num_col)):
    plt.subplot(3,3,i+1)
    plt.title(num_col[i])
    sns.boxplot(data=df1,y=df1['Occupation'],x=df1[num_col[i]])
plt.tight_layout()
plt.show()


fig = plt.figure(figsize=(15,8))

for i in range(len(num_col)):
    plt.subplot(3,3,i+1)
    plt.title(num_col[i])
    sns.boxplot(data=df1,y=df1['BMI Category'],x=df1[num_col[i]])
plt.tight_layout()
plt.show()


fig = plt.figure(figsize=(8,8))

for i in range(len(num_col)):
    plt.subplot(3,3,i+1)
    plt.title(num_col[i])
    sns.boxplot(data=df1,y=df1['Sleep Disorder'],x=df1[num_col[i]])
plt.tight_layout()
plt.show()


plt.figure(figsize=(5, 5))
plt.legend(fontsize=10)
plt.tick_params(labelsize=10)
ax=sns.scatterplot(x=df1['Age'],y=df1['Sleep Duration'],hue=df1['BMI Category'],data=df1,sizes=(50,500))
plt.xticks(rotation=90)
ax.legend(loc='upper left',bbox_to_anchor=(1,1))
x_lim = [25,60]
y_lim = [5.5,8.5]
plt.plot(x_lim, y_lim,color="red");

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


df1=df1.replace({'BMI Category': {'Normal': 0,'Normal Weight':1,'Overweight':2,'Obese':3}})


plt.figure(figsize=(5, 5))
plt.legend(fontsize=10)
plt.tick_params(labelsize=10)
ax=sns.scatterplot(x=df1['Age'],y=df1['BMI Category'],hue=df1['Sleep Duration'],data=df1,sizes=(50,500))
plt.xticks(rotation=90)
ax.legend(loc='upper left',bbox_to_anchor=(1,1))
x_lim = [25,60]
y_lim = [0,3]
plt.plot(x_lim, y_lim,color="red");

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


df1['Age'].describe()

count    374.000000
mean      42.184492
std        8.673133
min       27.000000
25%       35.250000
50%       43.000000
75%       50.000000
max       59.000000
Name: Age, dtype: float64


df1['Age_bin'] = pd.cut(df1['Age'],[0, 30, 40, 50,60], labels=False)

df1


df1.groupby('Age_bin')['BMI Category'].mean().plot.line();


df1.groupby('Age_bin')['Sleep Duration'].mean().plot.line();


sns.boxplot(data=df1,x=df1['Age_bin'],y=df1['BMI Category']);


sns.boxplot(data=df1,x=df1['Age_bin'],y=df1['Sleep Duration']);


sns.boxplot(data=df1,x=df1['Occupation'],y=df1['Age_bin'])
plt.xticks(rotation=45);


sns.boxplot(data=df1,x=df1['Occupation'],y=df1['BMI Category'])
plt.xticks(rotation=45);


sns.boxplot(data=df1,x=df1['Occupation'],y=df1['Sleep Duration'])
plt.xticks(rotation=45);

	Person ID	Gender	Age	Occupation	Sleep Duration	Quality of Sleep	Physical Activity Level	Stress Level	BMI Category	Heart Rate	Daily Steps	Sleep Disorder	BloodPressure_high	BloodPressure_low	Age_bin
0	1	Male	27	Software Engineer	6.1	6	42	6	2	77	4200	None	126.0	83.0	0
1	2	Male	28	Doctor	6.2	6	60	8	0	75	10000	None	125.0	80.0	0
2	3	Male	28	Doctor	6.2	6	60	8	0	75	10000	None	125.0	80.0	0
3	4	Male	28	Sales Representative	5.9	4	30	8	3	85	3000	Sleep Apnea	140.0	90.0	0
4	5	Male	28	Sales Representative	5.9	4	30	8	3	85	3000	Sleep Apnea	140.0	90.0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
369	370	Female	59	Nurse	8.1	9	75	3	2	68	7000	Sleep Apnea	140.0	95.0	3
370	371	Female	59	Nurse	8.0	9	75	3	2	68	7000	Sleep Apnea	140.0	95.0	3
371	372	Female	59	Nurse	8.1	9	75	3	2	68	7000	Sleep Apnea	140.0	95.0	3
372	373	Female	59	Nurse	8.1	9	75	3	2	68	7000	Sleep Apnea	140.0	95.0	3
373	374	Female	59	Nurse	8.1	9	75	3	2	68	7000	Sleep Apnea	140.0	95.0	3

Table of contents¶

Importing¶

Data Outline and Preprocessing¶

Visualization¶

Analysis - "Relationship between sleep duration and body mass index depends on age"¶

	Person ID	Gender	Age	Occupation	Sleep Duration	Quality of Sleep	Physical Activity Level	Stress Level	BMI Category	Heart Rate	Daily Steps	Sleep Disorder	BloodPressure_high	BloodPressure_low
0	1	Male	27	Software Engineer	6.1	6	42	6	Overweight	77	4200	None	126.0	83.0
1	2	Male	28	Doctor	6.2	6	60	8	Normal	75	10000	None	125.0	80.0
2	3	Male	28	Doctor	6.2	6	60	8	Normal	75	10000	None	125.0	80.0
3	4	Male	28	Sales Representative	5.9	4	30	8	Obese	85	3000	Sleep Apnea	140.0	90.0
4	5	Male	28	Sales Representative	5.9	4	30	8	Obese	85	3000	Sleep Apnea	140.0	90.0